How does hemoglbin concentration vary by age, race, sex?
Example data: National Health and Nutritional Examination Survey (NHANES) data set containing data about anemia and iron status from the years on n=3,990 patients from 1999-2000. The file was created by merging demographic data with complete blood count file, and nutritional biochemistry lab file.
library("devtools")
devtools::install_github("alanbrookhart/anemia")
ggplot(data=anemia, aes(x=hgb)) + geom_histogram()
ggplot(data=anemia, aes(x=hgb)) + geom_histogram(binwidth = 0.05)
ggplot(data=anemia, aes(x=hgb,fill=sex)) +
geom_histogram(binwidth = 0.05)
ggplot(data=anemia, aes(x=hgb)) + geom_density()
ggplot(data=anemia, aes(x=hgb, fill=sex)) + geom_density()
ggplot(data=anemia, aes(x=age,y=hgb)) + geom_point()
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) + geom_point(alpha=0.5)
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_jitter(alpha=0.5)
ggplot(data=anemia, aes(x=age,y=hgb,color=sex,shape=sex)) +
geom_jitter(alpha=0.5)
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_jitter(alpha=0.5) +geom_smooth()
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) + geom_jitter(alpha=0.5) +
geom_smooth() + facet_wrap(~race)
ggplot(data=anemia, aes(x=age,y=hgb)) +
geom_jitter(aes(color=sex),alpha=0.5) +
geom_smooth(aes(color=sex)) + facet_grid(sex~race)
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) + geom_smooth() +
geom_jitter(aes(size=1/iron), alpha=0.2) + facet_wrap(~race)+theme_bw()
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_smooth() +
geom_jitter(aes(size=1/iron), alpha=0.1) +
xlab("Age")+ylab("Hemoglobin (g/dl)") + facet_wrap(~race)
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_smooth() + geom_jitter(aes(size=1/iron), alpha=0.1) +
xlab("Age")+ylab("Hemoglobin (g/dl)") +
scale_size(name = "Iron Deficiency")+ scale_color_discrete(name = "Sex")+
facet_wrap(~race) + theme_bw()
ggplot(data=anemia, aes(x=age,y=hgb)) + geom_hex()
ggplot(data=anemia, aes(x=race,y=hgb)) + geom_boxplot()
ggplot(data=anemia, aes(x=race,y=hgb,color=sex)) + geom_boxplot()+
geom_jitter(alpha=0.1)
ggplot(data=anemia, aes(x=race,y=hgb,color=sex)) + geom_boxplot()+
geom_jitter(alpha=0.1) + coord_flip()
ggplot(data=anemia, aes(x=race,y=hgb,color=race)) + geom_violin()+
geom_jitter(alpha=0.1)
ggplot(data=anemia, aes(x=sex,y=hgb,color=race)) + geom_violin()
anemia1 <- anemia %>% select(sex,hgb) %>% group_by(sex) %>%
summarize(mean = mean(hgb),
n = n(),
sd = sd(hgb),
lower = mean - sd / sqrt(n) * 1.96,
upper = mean + sd / sqrt(n) * 1.96) %>%
rename(Type = sex)
anemia2 <- anemia %>% select(race, hgb) %>% group_by(race) %>%
summarize(mean = mean(hgb),
n = n(),
sd = sd(hgb),
lower = mean - sd / sqrt(n) * 1.96,
upper = mean + sd / sqrt(n) * 1.96) %>%
rename(Type = race)
anemia3 <- rbind(anemia1, anemia2)
ggplot(data=anemia3, aes(x=Type, y=mean, ymin=lower, ymax=upper)) +
geom_pointrange()
ggplot(data=anemia3, aes(x=Type, y=mean, ymin=lower, ymax=upper)) +
geom_pointrange(shape=20) +
coord_flip() +
xlab("Demographics") + ylab("Mean Hemoglobin (95% CI)") + theme_bw()
library(ggthemes)
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_smooth() + geom_jitter(aes(size=1/iron), alpha=0.1) +
xlab("Age") + ylab("Hemoglobin (g/dl)") +
scale_size(name = "Iron Deficiency") + scale_color_discrete(name = "Sex")+
facet_wrap(~race) + theme_bw()
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_smooth() + geom_jitter(aes(size=1/iron), alpha=0.1) +
xlab("Age") + ylab("Hemoglobin (g/dl)") +
scale_size(name = "Iron Deficiency") + scale_color_discrete(name = "Sex") +
facet_wrap(~race) + theme_wsj()
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_smooth() + geom_jitter(aes(size=1/iron), alpha=0.1)+
xlab("Age") + ylab("Hemoglobin (g/dl)") +
scale_size(name = "Iron Deficiency") + scale_color_discrete(name = "Sex") +
facet_wrap(~race) + theme_economist()
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_smooth() + geom_jitter(aes(size=1/iron), alpha=0.1) +
xlab("Age") + ylab("Hemoglobin (g/dl)") +
scale_size(name = "Iron Deficiency") + scale_color_discrete(name = "Sex")+
facet_wrap(~race) + theme_solarized()
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_smooth() + geom_jitter(aes(size=1/iron), alpha=0.1) +
xlab("Age") + ylab("Hemoglobin (g/dl)") +
scale_size(name = "Iron Deficiency") + scale_color_discrete(name = "Sex")+
facet_wrap(~race) + theme_tufte()
display.brewer.all()
ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_smooth() + geom_jitter(aes(size=1/iron), alpha=0.1) +
xlab("Age") + ylab("Hemoglobin (g/dl)") +
scale_size(name = "Iron Deficiency") + scale_color_discrete(name = "Sex")+
facet_wrap(~race) + theme_bw() + scale_color_brewer(palette = "Dark2")
myplot <- ggplot(data=anemia, aes(x=age,y=hgb,color=sex)) +
geom_smooth() + geom_jitter(aes(size=1/iron), alpha=0.1) +
xlab("Age") + ylab("Hemoglobin (g/dl)") +
scale_size(name = "Iron Deficiency") + scale_color_discrete(name = "Sex")+
facet_wrap(~race) + theme_bw() + scale_color_brewer(palette = "Dark2")
ggsave("hgb_age.jpeg", plot = myplot)
library(maps)
library(mapproj)
us.states <- map_data("state")
ggplot(data=us.states, aes(x=long, y=lat, group=group)) +
geom_polygon(fill="white", color="black")
ggplot(data=us.states, aes(x=long, y=lat, group=group, fill=region)) +
geom_polygon(color="black", size=0.2) + guides(fill=FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
ggplot(data=us.states, aes(x=long, y=lat, group=group, fill=region)) +
geom_polygon(color="black", size=0.2) +
coord_map("albers", lat0 = 45.5, lat1 = 29.5) + guides(fill=FALSE)
head(USArrests)
## Murder Assault UrbanPop Rape
## Alabama 13.2 236 58 21.2
## Alaska 10.0 263 48 44.5
## Arizona 8.1 294 80 31.0
## Arkansas 8.8 190 50 19.5
## California 9.0 276 91 40.6
## Colorado 7.9 204 78 38.7
arrests <- USArrests
names(arrests) <- tolower(names(arrests))
arrests$region <- tolower(rownames(USArrests))
crime <- merge(us.states, arrests, sort = FALSE, by = "region")
crime <- crime[order(crime$order), ]
ggplot(crime, aes(long, lat)) +
geom_polygon(aes(group = group, fill = assault)) +
coord_map("albers", lat0 = 45.5, lat1 = 29.5)
Let \(S_i(t) \in \cal{S}\) be the state of individual \(i\) at time \(t\), where \(\cal{S}\) is the set of different states. Let \(C\) be the time of censoring.
Using Sankey diagrams, we are interested in depicting the proportion of the population in different states at a select number of times (via the height of vertically stacked boxes) and the proport
If there were no uncensored observations, we could estimate state probabilities with simple averages. \[ \hat{Pr}(S(t)=s)=\frac{1}{n}\sum \frac{I(C_i>t) I(S(t)=t)}{Pr(C_i>t|X_i)}. \]
\[ \hat{Pr}(S(t)=s)=\frac{1}{n}\sum \frac{I(C>t_i) I(S(t)=t)}{Pr(C>t_i|X_i)}. \] where Pr(C>t_i|X_i) can be estimated with a Cox proportional hazards regression.